{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import torch\n",
    "import sys\n",
    "sys.path.append(\"../\")\n",
    "from initial_dataSet import DataSet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DataSet: MathEC\n"
     ]
    }
   ],
   "source": [
    "basedir = '../'\n",
    "dataSet_list = ('ASSIST_0910', 'ASSIST_2017', 'JUNYI','MathEC')\n",
    "dataSet_idx = 3\n",
    "\n",
    "data_set_name = dataSet_list[dataSet_idx]\n",
    "dataSet = DataSet(basedir, data_set_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "Q=dataSet.get_exer_conc_adj()\n",
    "record=dataSet.record\n",
    "total_stu_list = dataSet.total_stu_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "concept_num_list=[]\n",
    "item_num_list=[]\n",
    "for stu in total_stu_list:\n",
    "    stu_items=torch.LongTensor(record.loc[stu,'item_id'].values-1)\n",
    "    stu_concept_sum=Q[stu_items].sum(dim=0)\n",
    "    stu_concept_num=(stu_concept_sum>0).sum()\n",
    "    item_num_list.append(len(stu_items))\n",
    "    concept_num_list.append(stu_concept_num.item())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "平均每个习题考察知识点数： 1.1\n",
      "作答习题数： 15867850\n",
      "平均每个学习者作答题目数： 133.38\n",
      "平均每个学习者可被考察的知识点数： 34.05\n",
      "平均被考察的知识点占比： 0.09\n"
     ]
    }
   ],
   "source": [
    "mean_concepts=np.mean(concept_num_list)\n",
    "mean_concepts_retio=mean_concepts/dataSet.concept_num\n",
    "print('平均每个习题考察知识点数：',Q.sum(dim=1).mean().numpy().round(2))\n",
    "print('作答习题数：',record.shape[0])\n",
    "print('平均每个学习者作答题目数：',np.mean(item_num_list).round(2))\n",
    "print('平均每个学习者可被考察的知识点数：',mean_concepts.round(2))\n",
    "print('平均被考察的知识点占比：',mean_concepts_retio.round(2))"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "527a93331b4b1a8345148922acc34427fb7591433d63b66d32040b6fbbc6d593"
  },
  "kernelspec": {
   "display_name": "Python 3.9.7 64-bit ('pytorch': conda)",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
